package com.gglu.www.task.processor;

import com.baomidou.mybatisplus.mapper.EntityWrapper;
import com.gglu.www.model.webmagic.Article;
import com.gglu.www.service.webmagic.ArticleService;
import com.gglu.www.util.DateUtil;
import com.gglu.www.util.SpringContextUtil;
import com.gglu.www.util.StringUtils;
import com.google.common.base.Strings;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 杭州新苗网络科技有限公司
 *
 * @author meihf
 * @create 2018/1/28
 * @description
 */
@Component
public class PageHtmlForMeiHuaWang implements PageProcessor{

    private static final String MEIHUA_WANG = "https://www.meihua.info/";

    private Logger logger = Logger.getLogger(PageHtmlForMeiHuaWang.class);

    private Site site = Site.me().setDomain(MEIHUA_WANG);

    @Autowired
    ArticleService articleService;

    private static List<String> urlLinks = new ArrayList<>();

    private static List<Article> articleList = new ArrayList<>();

//    @Scheduled(cron = "0/1 * * * * ?")
//    @PostConstruct
    @Scheduled(fixedDelay = 43200000)
    public void saveContentForMeiHuaWang(){
        logger.info("开始抓取梅花网前两页信息");
//        Spider.create(new PageHtmlForMeiHuaWang()).addUrl(MEIHUA_WANG+"/a/71032")
//                .addPipeline(new ConsolePipeline()).run();

//        for (int i=62;i<=3279;i++){
//            if (i%5==0){
//                articleService = (ArticleService) SpringContextUtil.getBean("articleService");
//                articleService.insertBatch(articleList);
//                articleList = new ArrayList<>();
//            }
//            Spider.create(new PageHtmlForMeiHuaWang()).addUrl(MEIHUA_WANG+"/?p="+i)
//                    .addPipeline(new ConsolePipeline()).run();
//        }

        Spider.create(new PageHtmlForMeiHuaWang()).addUrl(MEIHUA_WANG+"/?p="+1)
        .addPipeline(new ConsolePipeline()).thread(1).run();
        //这里不能用多线程否则插入数据库中会出现多条重复记录

    }

    public static void main(String args[]) {
        Spider.create(new PageHtmlForMeiHuaWang()).addUrl(MEIHUA_WANG+"/?p="+1)
                .addPipeline(new ConsolePipeline()).thread(1).run();
        //这里不能用多线程否则插入数据库中会出现多条重复记录
    }

    @Override
    public void process(Page page) {
                articleService = (ArticleService) SpringContextUtil.getBean("articleService");

//        List<String> nextLinks = page.getHtml().$(".text-right").links().all();
//        List<String> articleLinks = new ArrayList<>();
//        if (nextLinks.size() == 0){
//           Integer id = Integer.valueOf(page.getUrl().regex("\\d+").toString());
//            articleLinks.add("https://www.meihua.info/a/"+(id-1));
//        }else {
//            articleLinks = page.getHtml().$(".text-right").links().all().subList(0,1);
//        }

        String title = page.getHtml().xpath("//div[@id='content']/div[@class='art-detail']/div[@class='title']/text()").toString();
        String summary = page.getHtml().xpath("//div[@id='content']/div[@class='art-detail']/div[@class='summary']/text()").toString();
        String art_content = page.getHtml().xpath("//div[@id='content']/div[@class='art-detail']/" +
                "div[@class='article']/div[@class='art-content']/p/tidyText()").all().toString();

        String scan =page.getHtml().xpath("//span[@class='sub-view']/span[@class='subinfo_item']/text()").regex("\\d+").toString();
        String writeDate = page.getHtml().xpath("//span[@class='subinfo_item abc']/text()").toString();
        //如果没有包含年就是当前年
        if (!Strings.isNullOrEmpty(writeDate)){
            if (writeDate.contains("年")){
                writeDate = StringUtils.replaceChinese(writeDate);
            }else {
                String writeDateYear = DateUtil.getNowTimeStr(DateUtil.YYYY_MM_DD).substring(0,4);
                writeDate = writeDateYear+"-" + StringUtils.replaceChinese(writeDate);
            }
        }

        String url = page.getUrl().get();
        String workTitle = page.getHtml().xpath("//div[@class='wd_top_inner']/div[@class='wd_title']/text()").toString();
        String workArtContent = page.getHtml().xpath("//div[@class='main-left']/div[@class='wd_text']/p/tidyText()").all().toString();
        String workStart = page.getHtml().xpath("//span[@id='span_btn_works_collect']/span[@id='span_works_collectCount']/text()").toString();
        String workPraise = page.getHtml().xpath("//span[@id='span_btn_works_praise']/span[@id='span_works_praiseCount']/text()").toString();
        String workScan = page.getHtml().xpath("//div[@class='wd_top_i']/span[4]/label[2]/text()").toString();

        Article article = new Article();
        if ((!Strings.isNullOrEmpty(title) && !Strings.isNullOrEmpty(summary))){
            article.setTitle(title);
            article.setContent(art_content);
            article.setSummary(summary);
            article.setScan(scan);
            if (Strings.isNullOrEmpty(writeDate)){
                article.setWriteDate(new Date());
            }else {
                article.setWriteDate(DateUtil.format2Date(writeDate,DateUtil.YYYY_MM_DD));
            }
        }else if ((!Strings.isNullOrEmpty(workTitle)) && !Strings.isNullOrEmpty(workArtContent)){
            article.setContent(workArtContent);
            article.setTitle(workTitle);
            article.setScan(workScan);
            article.setStar(workStart);
            article.setPraise(workPraise);
        }
        article.setUrl(url);
        article.setCreateDate(new Date());
        //判断文字是否已经保存
        if (!Strings.isNullOrEmpty(article.getTitle()) && !Strings.isNullOrEmpty(article.getUrl())){
            EntityWrapper entityWrapper = new EntityWrapper();
            entityWrapper.where(" title={0} and url={1} ",article.getTitle(),article.getUrl());
            int articleCount =  articleService.selectCount(entityWrapper);
            if (articleCount==0){
                articleService.insert(article);
            }
        }

        //先按页码把所有链接扒下来，然后集中去处理
//        List<String> links = page.getHtml().xpath("//ul[@class='works_list']/li[@class='wi_li']/a/@href").all();
//        urlLinks.addAll(links);
//
//        获取下一页  每天定时获取两页的文章如果存在就不插入
        if(!url.substring(url.length()-1,url.length()).equals("2")){
            String nextUrl = MEIHUA_WANG+"/?"+page.getHtml().xpath("//span[@class='btn btn-default item next']/@onclick").regex("page=\\d+").toString();
            page.addTargetRequest(nextUrl);
        }

        List<String> links = page.getHtml().xpath("//div[@class='news-list-list']/ul[@class='list news-list']/" +
                "li[@class='item']/div[@class='article-img']/a/@href").all();
        page.addTargetRequests(links);

    }

    @Override
    public Site getSite() {
        return site;
    }
}
